from paddleocr import PaddleOCR, draw_ocr
import argparse
import os


# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
# 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。

parser = argparse.ArgumentParser("video2images_ocr")

parser.add_argument("--save_dir", '-s', type=str, default='./output', help='The save dir to images')


def use_ocr(_save_dir):
    pics = os.listdir(_save_dir)
    for filename in pics:
        print(filename)
        if not os.path.isdir(filename):
            ocr = PaddleOCR(use_angle_cls=True, lang="ch",
                            use_gpu=False)
            img_path = _save_dir + filename
            result = ocr.ocr(img_path, cls=True)
            txt_filename = filename.split('.')[0] + '.txt'
            inner = result[0]
            text = ""
            for each in inner:
                line = each[1][0]
                text += line + '\n'
            with open(_save_dir + txt_filename, 'w') as f:
                f.write(text)


if __name__ == '__main__':
    args = parser.parse_args()
    save_path = args.save_dir
    print(save_path)

    use_ocr(save_path)
